* SET UP A DATA SET WITH OW AND NW MUNICIPALITIES *


clear
cap log close 
cap clear matrix
set more off
set mem 500m

global yourpath= "PATHESTV\Sinergia\Isabel_Martinez\OW\setup_canton_DiD_data"

* * * * * TAXPAYERS * * * * * * * * * TAXPAYERS * * * * * * * * * TAXPAYERS * * * *  
* load the data FOR YEARS 1989-2010 (VP25-10)
use  "PATHESTV\Sinergia\VP_all_per_cant\NP_new_1.dta", clear
forval n = 2/26 {
append using  "O:\STP\Externe\Sinergia\VP_all_per_cant\NP_new_`n'.dta"
}

* add the other years VP 24 does not exist!!!
foreach y in  19 20 21 22 23   11 12 13 14 15 16 {
	forval n = 1/26 {
	append using  "PATHESTV\Sinergia\VP`y'\NP_`n'_light.dta"
	}
drop kanahv
}

* adjust years / tax periods
run "$yourpath/recode_periods+years_NP.do"
	sort year gdekannr
	tab year
	
keep gdekannr snp_gdenr year snpbercd snpzivcd snpanzki snpsteink snpreink snpstbetr snp_ahvnr snp_stacd
	
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* PREP THE DATA
* SORT OUT SOME CATEGORIAL VARIABLES
* civil status
replace snpzivcd= 3 if (snpanzki>0 & snpzivcd==0) /*married with children */
drop snpanzki

* labor market status
replace snpbercd= "22" if snpbercd=="-"
replace snpbercd= "22" if snpbercd=="X"
replace snpbercd= "10" if snpbercd=="N" 				/*non-working*/
destring snpbercd, replace
replace snpbercd= 1 if snpbercd >= 0 & snpbercd <=6 	/*self-employed*/
replace snpbercd= 9 if snpbercd >=7 & snpbercd <=9
replace snpbercd= 0 if snpbercd == 9 					/*employee*/
replace snpbercd= 2 if snpbercd == 22 					/*retiree*/


* DEFLATE MONETARY VARIABLES 
merge m:1 year using  "$yourpath/CPI2005_1971-2019.dta"
drop _merge
drop if year==.

foreach var in snpsteink snpreink snpstbetr {
replace `var'=`var'/cpi05*100.5 /* values of Dec 2005 */
}

* GENERATE 2 INCOME INDICATORS

* taxable income >300K  and 200K-295K treatment and control dummies
gen treat=(snpsteink>3000 & snpsteink<.)
gen control=(snpsteink>2000 & snpsteink<2950)


* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

* COLLAPSE AND SAVE THE DATA *
* Variante A: collapse by all categries simultaneously * 
preserve
collapse (count) n_taxpayers=snp_ahvnr (mean) steink=snpsteink (mean) reink=snpreink (mean) stbetr=snpstbetr (sem) steink_se=snpsteink (sem) reink_se=snpreink (sem) stbetr_se=snpstbetr, by(year snp_gdenr snpzivcd snpbercd snp_stacd)
sort year snp_gdenr
compress
save "$yourpath/Datasets/allcant_taxpayers_coll_A.dta", replace
restore




* Variante  A_treatment: collapse by a category and treatment dummy simultaneously *
preserve
collapse (count) n_taxpayers_z=snp_ahvnr (mean) cant = gdekannr ///
(mean) steink_z=snpsteink (mean) reink_z=snpreink (mean) stbetr_z=snpstbetr ///
(sem) steink_se_z=snpsteink (sem) reink_se_z=snpreink (sem) stbetr_se_z=snpstbetr ///
, by(year snp_gdenr treat snpzivcd )
sort year snp_gdenr
rename treat treat_z
compress
tempfile A2_1
save "`A2_1'"
restore

preserve
collapse (count) n_taxpayers_b=snp_ahvnr (mean) cant = gdekannr ///
(mean) steink_b=snpsteink (mean) reink_b=snpreink (mean) stbetr_b=snpstbetr (sem) steink_se_b=snpsteink (sem) reink_se_b=snpreink (sem) stbetr_se_b=snpstbetr, by(year snp_gdenr treat snpbercd )
sort year snp_gdenr
rename treat treat_b
compress
tempfile A2_2
save "`A2_2'"
restore

preserve
collapse (count) n_taxpayers_t=snp_ahvnr (mean) cant = gdekannr ///
(mean) steink_t=snpsteink (mean) reink_t=snpreink (mean) stbetr_t=snpstbetr (sem) steink_se_t=snpsteink (sem) reink_se_t=snpreink (sem) stbetr_se_t=snpstbetr, by(year snp_gdenr treat snp_stacd )
sort year snp_gdenr
rename treat treat_t
compress
tempfile A2_3
save "`A2_3'"


use "`A2_1'", clear
merge m:m year snp_gdenr using "`A2_2'", nogen
merge m:m year snp_gdenr using "`A2_3'", nogen
save "$yourpath/Datasets/allcant_taxpayers_coll_A_treatment.dta", replace
restore


* Variante  A_control: collapse by a category and control dummy simultaneously *
preserve
collapse (count) n_taxpayers_zc=snp_ahvnr (mean) cant = gdekannr ///
(mean) steink_zc=snpsteink (mean) reink_zc=snpreink (mean) stbetr_zc=snpstbetr (sem) steink_se_zc=snpsteink (sem) reink_se_zc=snpreink (sem) stbetr_se_zc=snpstbetr, by(year snp_gdenr control snpzivcd )
sort year snp_gdenr
rename control control_zc
compress
tempfile A2_1c
save "`A2_1c'"
restore

preserve
collapse (count) n_taxpayers_bc=snp_ahvnr (mean) cant = gdekannr ///
(mean) steink_bc=snpsteink (mean) reink_bc=snpreink (mean) stbetr_bc=snpstbetr (sem) steink_se_bc=snpsteink (sem) reink_se_bc=snpreink (sem) stbetr_se_bc=snpstbetr, by(year snp_gdenr control snpbercd )
sort year snp_gdenr
rename control control_bc
compress
tempfile A2_2c
save "`A2_2c'"
restore

preserve
collapse (count) n_taxpayers_tc=snp_ahvnr (mean) cant = gdekannr ///
(mean) steink_tc=snpsteink (mean) reink_tc=snpreink (mean) stbetr_tc=snpstbetr (sem) steink_se_tc=snpsteink (sem) reink_se_tc=snpreink (sem) stbetr_se_tc=snpstbetr, by(year snp_gdenr control snp_stacd )
sort year snp_gdenr
rename control control_tc
compress
tempfile A2_3c
save "`A2_3c'"


use "`A2_1c'", clear
merge m:m year snp_gdenr using "`A2_2c'", nogen
merge m:m year snp_gdenr using "`A2_3c'", nogen
save "$yourpath/Datasets/allcant_taxpayers_coll_A_control.dta", replace
restore



* Variante B: collapse variable by variable and merge the tempfiles together into one dataset *
preserve
* zivilcode
collapse (count) n_taxpayers_z=snp_ahvnr (mean) cant = gdekannr ///
(mean) steink_z=snpsteink (mean) reink_z=snpreink (mean) stbetr_z=snpstbetr (sem) steink_sem_z=snpsteink (sem) reink_sem_z=snpreink (sem) stbetr_sem_z=snpstbetr, by(year snp_gdenr snpzivcd)
sort year snp_gdenr
compress
tempfile dta_1
save "`dta_1'"
restore

preserve
* berufscode
collapse (count) n_taxpayers_b=snp_ahvnr (mean) cant = gdekannr ///
(mean) steink_b=snpsteink (mean) reink_b=snpreink (mean) stbetr_b=snpstbetr (sem) steink_sem_b=snpsteink (sem) reink_sem_b=snpreink (sem) stbetr_sem_b=snpstbetr, by(year snp_gdenr snpbercd)
sort year snp_gdenr
compress
tempfile dta_2
save "`dta_2'"
restore

preserve
* steuerart
collapse (count) n_taxpayers_t=snp_ahvnr (mean) cant = gdekannr ///
(mean) steink_t=snpsteink (mean) reink_t=snpreink (mean) stbetr_t=snpstbetr (sem) steink_sem_t=snpsteink (sem) reink_sem_t=snpreink (sem) stbetr_sem_t=snpstbetr, by(year snp_gdenr snp_stacd)
sort year snp_gdenr
compress
tempfile dta_3
save "`dta_3'"
restore



preserve
* treatment group
collapse (count) n_taxpayers_tr=snp_ahvnr (mean) cant = gdekannr ///
(mean) steink_tr=snpsteink (mean) reink_tr=snpreink (mean) stbetr_tr=snpstbetr (sem) steink_sem_tr=snpsteink (sem) reink_sem_tr=snpreink (sem) stbetr_sem_tr=snpstbetr, by(year snp_gdenr treat)
sort year snp_gdenr
compress
tempfile dta_5
save "`dta_5'"
restore

preserve
* control group
collapse (count) n_taxpayers_ctr=snp_ahvnr (mean) cant = gdekannr ///
(mean) steink_ctr=snpsteink (mean) reink_ctr=snpreink (mean) stbetr_ctr=snpstbetr (sem) steink_sem_ctr=snpsteink (sem) reink_sem_ctr=snpreink (sem) stbetr_sem_ctr=snpstbetr, by(year snp_gdenr control)
sort year snp_gdenr
compress
tempfile dta_5c
save "`dta_5c'"
restore

preserve
* total
collapse (count) n_taxpayers_tot=snp_ahvnr (mean) cant = gdekannr ///
(mean) steink_tot=snpsteink (mean) reink_tot=snpreink (mean) stbetr_tot=snpstbetr (sem) steink_sem_tot=snpsteink (sem) reink_sem_tot=snpreink (sem) stbetr_sem_tot=snpstbetr, by(year snp_gdenr)
sort year snp_gdenr
compress
tempfile dta_6
save "`dta_6'"

* merge everything together
use "`dta_1'", clear
merge m:m year snp_gdenr using "`dta_2'", nogen
merge m:m year snp_gdenr using "`dta_3'", nogen
merge m:m year snp_gdenr using "`dta_5'", nogen
merge m:m year snp_gdenr using "`dta_5c'", nogen
merge m:m year snp_gdenr using "`dta_6'", nogen
save "$yourpath/Datasets/allcant_taxpayers_coll_B.dta", replace
restore 
